---
title: "Turkish Sentiment"
author: "Omer Zarpli"
date: "4/1/2021"
output: html_document
---

site that explains it.
https://www.programmingwithr.com/sentiment-analysis-in-r-with-custom-lexicon-dictionary-using-tidytext/

lexicon
https://github.com/amrrs/sentiment-turkish-tidytext

```{r}
library(tidyverse)
library(tidytext)
library(readxl)
```

# Load Lexicon and Data
```{r}
#This code will download the Turkish lexicon from GitHub. I saved it.
lexicon <- read.csv("https://raw.githubusercontent.com/amrrs/sentiment-turkish-tidytext/master/turkish_lexicon.csv")
write.csv(lexicon, "data/turkish_lexicon.csv", row.names = F)

# Load Lexicon
lexicon <- read.csv("data/turkish_lexicon.csv") %>% 
  select(c("WORD","POLARITY")) %>% 
  rename('word'="WORD",'value'="POLARITY") %>%
  mutate(
    word = as.character(word))

lexicon <- read.table("turkish_lexicon.csv",
                      header = T,
                      sep = ';',
                      stringsAsFactors = F)
```




```{r}

#### This is where the data preparation and analysis begins ####


#Main analysis with "POLARITY" score
lexicon <- read_excel("Turkish Lexicon .xlsx") %>% 
  select(c("WORD","POLARITY", "TONE")) %>% 
  rename('word'="WORD",'value'="POLARITY") 

# Load data
sent <- read_csv("data/no_typos.csv") %>%
  filter(QID54 != "-99") %>%
  mutate(
    QID54 = as.character(QID54))

```

```{r}
out <- sent %>%
  mutate(linenumber = seq(1:1009)) %>% #line number for later sentence grouping
  unnest_tokens(word, QID54) %>% #tokenization - sentence to words
  inner_join(lexicon) %>% # inner join with our lexicon to get the polarity score
  group_by(linenumber) %>% #group by for sentence polarity
  summarise(sentiment = sum(value)) %>% # final sentence polarity from words
  left_join(
  sent %>%
  mutate(linenumber = row_number())) #get the actual text next to the sentiment value

# generating binary variables for treatments 
# Treatment1 vs. Treatment3 
out <- out %>%
mutate (treat_uncert = case_when(FL_5_DO == "Treatment1" ~ 0, 
        FL_5_DO=="Treatment3" ~ 1))

r <- lm(sentiment ~ FL_5_DO, data = out)
summary(r)
```











